// Produce Figure 1
set more off

// Load Data

use "all_tasklevel_data.dta", clear

label define dec_ 1 "1st Decile Break" 2 "2nd Decile Break" 3 "3rd Decile Break" 4 "4th Decile Break" ///
  5 "5th Decile Break" 6 "6th Decile Break" 7 "7th Decile Break" 8 "8th Decile Break" 9 "9th Decile Break" 10 "10th Decile Break" 11 "First Task of Day", add
  label values abs_tdiff_group dec_

label define dec_2 1 "1st Decile Session Task" 2 "2nd Decile Session Task" 3 "3rd Decile Session Task" 4 "4th Decile Session Task" ///
  5 "5th Decile Session Task" 6 "6th Decile Session Task" 7 "7th Decile Session Task" 8 "8th Decile Session Task" 9 "9th Decile Session Task" 10 "10th Decile Session Task", add
 label values cluster10_group dec_2
 
label var same_task "Same Task at t-1"

sort year_a month_a
egen time_group = group(month_a year_a) 
gen quarter_group = 0

local index=1
forvalues i = 1(3)39 {
	replace quarter_group = `i' if time_group >= `i' & time_group <= `i'+3
	local index = `index'+1
}

// Figure 1a 

pctile pct_0 = task_reward if sex==0 & task_reward >0 & in_sample==1, nq(100) genp(percent_0)
pctile pct_1 = task_reward if sex==1 & task_reward >0 & in_sample==1, nq(100) genp(percent_1)

twoway  (line percent_0 pct_0, lwidth(medthick)) ///
		(line percent_1 pct_1 , lwidth(medthick)), ///
		legend(order(1 "Male" 2 "Female") position(6)  rows(1)) ///
		xtitle(Task Reward) ///
		ytitle(CDF) graphregion(color(white))	
graph export "taskreward_ecdf.png", replace	
drop pct_* percent_*	

// Figure 1b 
g tmp = .
replace tmp = 1 if task_type_ ==6
replace tmp = 2 if task_type_ ==5
replace tmp = 3 if task_type_ ==10
replace tmp = 4 if task_type_ ==1
replace tmp = 5 if task_type_ ==4
replace tmp = 6 if task_type_ ==2
lab var tmp "Task Type"

twoway (histogram tmp if in_sample==1 & returned==0 & sex==1, discrete fcolor(red%80)) ///
		(histogram tmp if in_sample==1 & returned==0 & sex==0, discrete fcolor(none)), ///
		legend(order(1 "Female" 2 "Male") position(6) rows(1)) ///
		xlabel(1 "Research" 2 "Transcription" 3 "Other" ///
		4 "Entry" 5 "Rating" 6 "Verify", angle(45) labsize(small))
graph export "tasktype_ecdf.png", replace	

// Figure 1c --- basic regression 
set more off

reg log_interval_wage sex ///
			 i.quarter_group if in_sample==1 & returned==0, vce(cluster worker_id)
estimates store work_1

reg log_interval_wage sex ///
			log_task_reward ///
			 i.quarter_group if in_sample==1 & returned==0, vce(cluster worker_id)
estimates store work_2

reg log_interval_wage sex ///
			 log_task_reward i.task_type_ ///
			 i.quarter_group if in_sample==1 & returned==0, vce(cluster worker_id)
estimates store work_3

reg log_interval_wage sex ///
			log_task_reward i.task_type_ log_worker_n ///
			 i.quarter_group if in_sample==1 & returned==0, vce(cluster worker_id)
estimates store work_4


coefplot work_1 work_2 work_3 work_4 , vertical keep(sex2)  ///
							ytitle("Gender Wage Gap") ///
								legend(order(1 "Raw" ///
											3 "+ Task Reward" ///
											5 "+ Task Type" ///
											7 "+ Total Experience") ///
											position(6) rows(1)) yline(0) ///
											recast(bar) barwidth(0.1)  ///
											yscale(r(-.45 .05)) ///
											ylabel(-0.4(0.1)0) 




